home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
MacWorld 1999 July
/
Macworld (1999-07).dmg
/
Shareware World
/
Info
/
For Developers
/
Mops 3.4.sea
/
Mops source
/
PPC source
/
pnuc1
< prev
next >
Wrap
Text File
|
1999-02-21
|
28KB
|
1,103 lines
\ Objects, constants, values etc.
forward INTERPRET \ Not a vector any more - we never used that feature
forward REFILL
forward FREFILL
forward THROW
false value RAinMod \ Set if a relocatable address is in a module
false value echo? \ Set if we're echoing during first stage load
false value err_info_valid?
variable #TIB
variable >IN
0 value LATEST
0 value CURR-DEF
0 value FENCE
0 value SRC-START
0 value SRC-LEN
0 value SOURCE-ID
0 value ACTW
0 value OUT
0 value #lines_read
0 value STATE
false value CSTATE
10 value BASE
-1 value DPL
0 value HLD
false value CASE_IN_NAMES?
0 value throwHandler
big# value DotStkLim
-1 value SLEEPTICKS
5 constant PROCESSOR \ let's call PowerPC processor 5
true constant AppleEvents? \ AppleEvents are always available on PPC
true constant GestaltAvail? \ Likewise for Gestalt
\ The following values are used internally by Mops.
0 value CD_gpr#
0 value TO_gpr#
0 value const_data_start
0 value CD_GPR_loc
0 value savedRP
0 value MMRgn
0 value meth_seg#
$ BFFBFFBF , \ marker so we can easily recognize the
\ execution buffer
variable exBuff 512 allot \ the buffer
$ 98765432 , \ marker so we can recognize the end
0 value exBuff_offs
false value MRopen?
false value initzed?
0 value quitapp?
0 value frNxtDP
0 value (err#)
0 value loc#
0 value #P
0 value #PL
0 value #FP
0 value #FPL
0 value #VL
0 value tempObj_block_size \ if nonzero, this is the size of the extra
\ part of the return stack frame, used for
\ temp objects.
false value tempObjs? \ true if we have temp objects. We can't rely on
\ tempObj_block_size being nonzero, since they
\ might all be in registers.
0 value releaseTemps_xt
0 value fltflg
0 value local?
0 value localSect?
0 value method?
0 value ^meth_link
0 value selfref?
0 value objclass
0 value #1st
0 value #last
0 value heldMod
0 value heldModStart
\ 0 value heldModBase
0 value methindex
false value sacomp?
true value relocchk?
false value inhibitmb?
0 value sups2skip
false value savingdic?
BL constant BL
8 constant #THREADS
big# constant BIG#
-300 constant FILE-MARK
\ Some handler code values that we need to be able to access from
\ above the nucleus:
classCode constant CLASSCODE
objcode constant OBJCODE
FvalCode constant FVALCODE
true value CURS?
true value UCFLAG
inlMk constant INLMK
0 value currBase
0 value colAflg
variable tempVbl 16 allot
\ ==============================
\ SYSTEM VECTORS
\ ==============================
(*
['] (emit) -> emitvec
['] (cr) -> crvec
['] (type) -> typevec
['] (spaces) -> spvec
['] (emit) -> echovec
['] (sf) -> setfWind
\ 0 -> quitvec \ mh May94 - quit doesn't get changed any more
['] bye -> byevec
*)
' null sVect HEADER
: rtnFalse 0 ;
' null sVect LOGVEC
\ ' rtnFalse sVect UFIND
dynamicVect EXTRAFIND
\ ' null sVect NUMACCUMULATE
' null sVect PAUSE
' null sVect ?PAUSE
' null sVect GETSPACE
' null sVect RNGERR
' null sVect $ERR
' null sVect ARITHERR
\ ' null sVect EXTRA_INITS
' null sVect ERRORVEC
' null sVect QUITVEC
' null sVect ABORTVEC
' null sVect SETFWIND
\ ' null sVect DIE
\ ' null sVect DFLT-DIE
\ ' null sVect FREFILL
' null sVect MODLOAD
\ ' null sVect TEIDLE
' null sVect COMPINLINE
\ ' null sVect INTERPRET
' null sVect OPENAPPVEC
' null sVect OPENDOCVEC
' null sVect PRINTDOCVEC
' null sVect QUITAPPVEC
' null sVect READ1DOCVEC
' null vect TEidle_vect
\ ' null vect codeGen_vect
\ ========= dummy words used for accessing locals (see zArgs) =========
$ BC0D dummy_op LOCPARM
$ BC25 dummy_op FLOCPARM
\ ======== Dictionary header address conversion ========
: ?DP ;
\ TRAVERSE converts an addr pointing to one end of the name field
\ to one pointing to the other.
: TRAVERSE { addr dirn \ cnt -- addr' }
32 -> cnt
dirn 0>=
IF \ going up
addr c@ $ 1F and
4+ -4 and 1- ++> addr
ELSE \ going down
BEGIN
1 --> addr
addr c@x 0< IF addr EXIT drop THEN
1 --> cnt
cnt
NUNTIL
THEN
addr
;
: N>LINK 4- ;
: L>NAME 4+ ;
: NAME> 1 traverse 3+ ;
: LINK> L>name name> ;
\ >BODY ( xt -- dfa ) has to go to the data area for variables, values
\ etc. This isn't (and can't ever be) standard, since these kinds of
\ words don't have an 'xt' under the standard. But in Mops, you can
\ tick them, and use >BODY on the result to get to the data.
: >BODY 2+ @abs ;
: >NAME 3- -1 traverse ;
: >LINK >name n>link ;
: >HDLR 2- ;
\ ==============================
\ STACK MANIPULATION
\ ==============================
$ BD0F $ 6200 special_op DUP
$ BD0F $ 6300 special_op 2DUP
$ BD0F $ 6400 special_op DROP
$ BD0F $ 6500 special_op 2DROP
$ BD0F $ 6600 special_op SWAP
$ BD0F $ 6700 special_op OVER
$ BD0F $ 6800 special_op NIP
$ BD0F $ 6900 special_op TUCK
$ BD0F $ 6A00 special_op ROT
$ BD0F $ 6B00 special_op DOWN
$ BD0F $ 6B00 special_op -ROT \ these are synonyms
$ BD0F $ 6C00 special_op 2SWAP
\ FP stack ops:
$ BD0F $ 7200 special_op FDUP
$ BD0F $ 7300 special_op F2DUP
$ BD0F $ 7400 special_op FDROP
$ BD0F $ 7500 special_op F2DROP
$ BD0F $ 7600 special_op FSWAP
$ BD0F $ 7700 special_op FOVER
$ BD0F $ 7800 special_op FNIP
$ BD0F $ 7900 special_op FTUCK
$ BD0F $ 7A00 special_op FROT
$ BD0F $ 7B00 special_op FDOWN
$ BD0F $ 7C00 special_op F2SWAP
\ I use the following in inline code sequences here in the nucleus, before
\ locals are avaliable. And they're also handy in inlines.
$ BD0F $ 6D00 special_op 2PICK
$ BD0F $ 6E00 special_op 3PICK
$ BD0F $ 6F00 special_op 3ROLL
:ppc_code PICK
r4 0 cmpi, \ is it 0 pick?
eq if,
r4 r3 r3 or, \ yes - copy TOS
else,
r5 r4 2 0 29 rlwinm, \ no - mult index by 4
r5 r5 -4 addi, \ and subtract 4 to get SP offset
r4 r18 r5 lwzx, \ grab the cell
then,
blr, \ and return.
;ppc_code
\ =============================
\ SIMPLE ARITHMETIC
\ =============================
$ BD06 otAdd special_op +
$ BD06 otSub special_op -
$ BD06 otMul special_op *
$ BD06 otMul special_op *W \ don't need this as a separate op on PPC
$ BD06 otMulh special_op *HI \ will normally only be used internally
$ BD06 otUMulh special_op *UHI \ ditto
$ BD06 otDiv special_op /
$ BD06 otUDiv special_op U/
: M* ( n1 n2 -- d ) inline{ 2dup * down *hi} ;
: UM* ( u1 u2 -- ud ) inline{ 2dup * down *uhi} ;
\ we need um* - the standard sez so!!
\ Special arith ops to get us specific instructions, mainly for use
\ in inline sequences. We leave it as an exercise for the reader
\ to work out what the instructions are.
$ BD06 otAddc special_op __addc
$ BD06 otAdde special_op __adde
$ BD06 otAddze special_op __addze
$ BD06 otSubfc special_op __subfc
$ BD06 otSubfe special_op __subfe
$ BD06 otSubfze special_op __subfze
\ NEGATE and DNEGATE. The latter can be done with the special ops we just
\ defined.
$ BD06 otNEG special_op NEGATE
: DNEGATE inline{ swap 0 __subfc swap __subfze} ;
\ We need D+ in number input, so we might as well put D- here as well.
\ These sequences do the job in 2 or 3 instructions.
: D+ ( d1 d2 -- d3 )
inline{ swap 3roll __addc down __adde} ;
: D- ( d1 d2 -- d3 )
inline{ swap 3roll swap __subfc down __subfe} ;
\ FP:
$ BD06 otFADD special_op F+
$ BD06 otFSUB special_op F-
$ BD06 otFMUL special_op F*
$ BD06 otFDIV special_op F/
$ BD06 $ 54 special_op FABS
$ BD06 $ 55 special_op FNEGATE
\ Shifts:
$ BD30 $ 2A00 special_op <<
$ BD30 $ 2A00 special_op LSHIFT
$ BD30 $ 2A01 special_op >>
$ BD30 $ 2A01 special_op RSHIFT
$ BD30 $ 2A03 special_op A>>
\ the following inline definitions use some ops like > which we haven't
\ defined in the nucleus image yet. But since inlines use EVALUATE,
\ as long as the ops are defined somewhere in the nucleus there should
\ be no problem, since we precompile the nucleus.
: 2* inline{ dup +} ;
: 4* inline{ 2 <<} ;
: 2/ inline{ 1 a>>} ;
: 4/ inline{ 2 a>>} ;
: UNDER+ \ ( a b c -- a+c b )
inline{ rot + swap} ;
: MAX inline{ 2dup >= dup not rot and down and or} ;
: MIN inline{ 2dup < dup not rot and down and or} ;
\ we use >= instead of > for MAX, since this gives one
\ less instruction in the case 0 MAX. But for MIN,
\ using < gives TWO less instructions than <= (just 2)
: UMAX inline{ 2dup u> dup not rot and down and or} ;
: UMIN inline{ 2dup u< dup not rot and down and or} ;
\ Here u>/u< gives one less instruction than
\ u>=/u<=.
: ABS inline{ dup 31 a>> tuck + xor} ; \ yep, it works!!
\ +- ( n1 n2 -- n3 ) negates n1 if n2 is negative. I like this name
\ better than ?negate, since n2 isn't a flag. Note that ABS is
\ equivalent to DUP +-.
: +- inline{ 31 a>> tuck + xor} ;
: #ALIGN4 inline{ 3+ -4 and} ; \ other alignment words are in pnuc3,
\ but we need this one earlier.
: EXTEND inline{ 16 << 16 a>>} ;
: S>D inline{ dup 31 a>>} ;
\ =============================
\ LOGICAL OPERATIONS
\ =============================
\ NOT and INVERT are synonyms.
$ BD06 otNOT special_op NOT
$ BD06 otNOT special_op INVERT
$ BD06 otAND special_op AND
$ BD06 otOR special_op OR
$ BD06 otXOR special_op XOR
\ Logical operations directly on a memory byte. We define these as inlines,
\ since the'll only generate a few instructions.
: CSET \ ( c addr -- ) ORs c into the byte at addr.
inline{ dup c@ rot or swap c!} ;
: CRESET \ ( c addr -- ) clears bits in byte at addr, corresponding
\ to the bits SET in c.
inline{ dup c@ rot not and swap c!} ;
: CTOGGLE \ ( c addr -- ) Exclusive-ORs c into the byte at addr.
inline{ dup c@ rot xor swap c!} ;
: CREPLACE \ ( c mask addr -- )
\ Replaces bits in the addressed byte with the corresponding
\ bits from c, in those positions where the mask has ones.
inline{ 2dup c@ swap not and 2swap and or swap c!} ;
(*
: CREPLACE { c mask addr -- }
addr c@ mask not and c mask and or addr c! ;
*)
\ Logical operations on a memory bit - now omitted. Almost unused.
\ ===========================
\ COMPARISONS
\ ===========================
$ BD10 $ 2607 special_op =
$ BD10 $ 2606 special_op <>
$ BD10 $ 260C special_op >=
$ BD10 $ 260D special_op <
$ BD10 $ 260F special_op <=
$ BD10 $ 260E special_op >
$ BD10 $ 2605 special_op U<
$ BD10 $ 2603 special_op U<=
$ BD10 $ 2602 special_op U>
$ BD10 $ 2604 special_op U>=
$ BD10 $ 2617 special_op 0=
$ BD10 $ 2616 special_op 0<>
$ BD10 $ 261C special_op 0>=
$ BD10 $ 261D special_op 0<
$ BD10 $ 261F special_op 0<=
$ BD10 $ 261E special_op 0>
$ BD2A cmpEQ special_op F=
$ BD2A cmpNE special_op F<>
$ BD2A cmpGE special_op F>=
$ BD2A cmpLT special_op F<
$ BD2A cmpLE special_op F<=
$ BD2A cmpGT special_op F>
\ FP:
$ BD2A cmpZEQ special_op F0=
$ BD2A cmpZNE special_op F0<>
$ BD2A cmpZGE special_op F0>=
$ BD2A cmpZLT special_op F0<
$ BD2A cmpZLE special_op F0<=
$ BD2A cmpZGT special_op F0>
: WITHIN? \ ( n lo hi -- n b ) Returns true if lo <= n <= hi.
\ We define it inline which involves a lot of stack juggling,
\ but all that gets taken out at compile time, so the compiled
\ code is actually optimum.
inline{ rot tuck >= down tuck <= rot and} ;
(* that surely needs an explanation:
rot ( lo hi n )
tuck ( lo n hi n )
>= down ( b lo n )
tuck ( b n lo n )
<= ( b n b' )
rot and
*)
: UWITHIN? \ ( u lo hi -- u b ) An unsigned version of WITHIN?
inline{ rot tuck u>= down tuck u<= rot and} ;
\ ===========================
\ FETCHES AND STORES
\ ===========================
$ 6102 0 fetch_op @
$ 6102 0 fetch_op >PTR \ In our system, this is an alias for @.
$ 6101 0 fetch_op W@
$ 6101 1 fetch_op W@X
$ 6100 0 fetch_op C@
$ 6100 1 fetch_op C@X
$ BD32 simple_op F@
$ BD33 simple_op F!
$ BD42 simple_op SF@
$ BD43 simple_op SF!
$ BD08 $ 6002 special_op !
$ BD08 $ 2102 special_op +!
$ BD08 $ 2202 special_op -!
$ BD08 $ 6001 special_op W!
$ BD08 $ 2101 special_op W+!
$ BD08 $ 2201 special_op W-!
$ BD08 $ 6000 special_op C!
\ ============================================
\ DO LOOPS and RETURN STACK OPERATIONS
\ ============================================
(* Note: >R, R> and R@ are defined already (at the start of Setup, since
I needed >R so it was logical to put them all there).
We keep the loop index I in a reg, but the return stack is entirely
in memory, except that in leaf words we don't save/restore the link
register. This means that I can be used in words called from within
DO loops. In fact I can be used as another local variable. But this
is non-standard, so not a good idea. But it's useful for testing.
During DO loops, the info for any containing DO loop is saved on the
return stack in the order I (on top), limit, count register. Thus
J is at offset zero off r17 (rtn stk ptr), and K is at offset 12.
*)
I_reg gpr I
: J inline{ RP @} ;
: K inline{ RP 8 + @} ;
\ =========================
\ OBJECT ADDRESSING
\ =========================
(*
^BASE and SELF give the base address of the current object. There are two
words because "base address of the current object" can have two meanings, thanks
to multiple inheritance. ^BASE give what we might call the local base - the
base address of the current object considered as an object of the class in which
the ^BASE appears or from which it is called. In other words, this is the
address of the first ivar of the class in which the current method is declared.
Note that all ivars of this class will be at fixed offsets from ^BASE.
However with multiple inheritance, these ivars might be preceded by ivars of
a different (inherited) class. SELF simply the base address of the current
(dynamic) object, that is, the address of the first ivar (regardless of which
class it's inherited from). SELF and ^BASE might give identical results, but
if they differ, SELF must be lower.
Compiled code in methods needs to access ivars using ^BASE, since the offsets
are fixed. The offset from SELF of a given ivar might be different in different
objects. For this reason we keep ^BASE in a machine register, but compute SELF.
*)
\ obj_base_reg reg ^BASE
: SELF ( -- addr )
\ Returns the "real" base addr of the current object.
(^base) 4- dup w@x + \ ^class addr
8 + ; \ forward to beginning of obj data
\ =========================
\ SEGMENT HANDLING
\ =========================
(*
A segment table (ST) entry is 8 bytes long:
byte 0 flags
bytes 1-3 length of segment
bytes 4-7 base addr
A free segment is marked by bytes 0-3 being all zero. Currently
we don't have such a thing as a zero-length segment, though if
this ever became useful we could define a flag bit to mean a
segment isn't free, so that bytes 0-3 being all zero would still
mean the seg is free.
Note that 2^^24 is more than adequate for a maximum length, so
there's no problem with using the hi byte for flags.
The maximum number of available ST slots is max_segs. We number
segments from 8 up, corresponding to the hi byte of relocatable
addresses. Thus the highest legal seg# is max_segs + 7.
(Offsetting the seg# in this way makes handling reloc addrs
slightly easier, and also means that zero is illegal as a reloc
addr - probably a good idea.)
*)
: get_free_seg_pair { \ ^entry -- ^entry n }
max_segs 2
DO i 8 * segTable + -> ^entry
^entry @ ^entry 8 + @ or
NIF \ found the first free even-odd pair
1 ^entry ! \ give them a dummy length of 1 so we can see
1 ^entry 8 + ! \ they're not free
^entry i 8 + UNLOOP EXIT
THEN
2 +LOOP
208 die \ table full! Help!!
;
: segTable_entry \ ( seg# -- ^entry )
8 -
0 max_segs within? NIF 207 die THEN
8 * segTable +
;
: make_seg_absent \ ( seg# -- )
segTable_entry 4+
nilP swap ! \ nilP means it's absent. Note we leave the
\ length alone, since the seg is still assigned
\ to somebody.
;
: free_seg \ ( seg# -- )
segTable_entry
0 over !
nilP swap 4+ !
;
: addr>S&D { addr \ ^entry BA len xx -- seg# displ }
\ compMod $ 10000000 u> if dbgr then
\ addr $ 1000 u< if dbgr then
compMod 0<> comp_seg# and
IF \ we're compiling a module. Is it in that module?
comp_seg# 8 - 8 * segTable + -> ^entry
^entry @ $ 00ffffff and -> len
^entry 4+ @ -> BA
addr
BA dup len +
uwithin? \ in code area?
IF \ yes!
BA - comp_seg# swap EXIT
THEN
^entry 8 + @ $ 00ffffff and -> len ^entry 12 + @ -> BA
( addr )
BA dup len +
uwithin?
IF
BA - comp_seg# 1+ swap EXIT
THEN drop
THEN
max_segs 0
DO i 8 * segTable + -> ^entry
^entry @ $ 00ffffff and -> len
len
IF \ something there
^entry 4+ @ -> BA
BA nilP <>
IF \ seg is present
addr
BA dup len + uwithin?
IF \ found! addr is within this segment
BA -
i 8 + swap UNLOOP EXIT
ELSE drop
THEN
THEN
THEN
LOOP
0 0 \ search failed - return two zeros
;
\ =============================
\ MISCELLANEOUS LOW-LEVEL WORDS
\ =============================
\ SP@ should really only be used for stack dumping. Therefore the
\ main job is to ensure the memory part of the stack is updated to
\ what's in the regs.
:ppc_code SP@
$ 0100 codeHere 2- w! \ change flags to specify 1 result in regs. This
\ simplifies things, since this 1 result is just
\ the updated data stack pointer.
r3 -4 rSP stw,
r4 -8 rSP stwu,
r3 rSP mr,
blr,
;ppc_code
: SP! -> SP ;
RP_reg gpr RP@ \ synonym for RP in Mops
: RP! 0 -> exBuff_offs -1 -> (^base)
-> RP ;
: FSP! -> FSP ;
: BOUNDS inline{ over + swap} ;
: HERE inline{ dp} ;
: ALLOT inline{ ++> dp} ;
: ROOM ( -- n )
code_limit CDP - ;
: ROOM2 ( -- code-room data-room )
code_limit CDP - data_limit DP - ;
: HEADROOM ( -- n ) \ On the 68k, returns the distance from DP to the top of
\ the A4 addressing range. Here on the PPC we make it
\ the distance from CDP to the top of the mainCode addressing
\ range - that's probably somewhat useful, though the
\ distance from DP to the top of the mainData addressing
\ range would be useful as well.
mainCode half_displ_range + CDP - ;
: UNUSED inline{ room} ;
: COUNT inline{ dup 1+ swap c@} ;
: LENGTH inline{ dup 2+ swap w@} ;
: DEPTH
SP0 SP - 4/ 2+ ; \ we have 2 cells in regs on entry
: FDEPTH
FSP0 FSP - 3 a>> 2+ ; \ ditto
: DIGIT { char #base -- b }
false
char & z u> ?EXIT \ if above LC letters, fail
char & a u>= IF $ DF and> char THEN \ LC letter -> UC
$ 30 --> char \ '0'-'9' -> 0-9
char 0< ?EXIT \ if not a digit, fail
char 10 >=
IF 7 --> char \ A-Z -> 10-35
char 10 < ?EXIT \ but if not a letter, fail
THEN
char #base >= ?EXIT \ if char now > base, fail
drop char true \ if we got here, success!
;
: DECIMAL 10 -> base ;
: HEX 16 -> base ;
(* HASH produces a 32-bit hash value. We always set the top bit
(so that a hashed value is never zero, and is always distinguishable
from a relocatable address, which is always "positive").
This means that we effectively have 2**31 hash possibilities. This is
large enough that hash collisions should hardly ever occur.
If a 16-bit hash value is required, as in Neon, use wHash.
We use assembly for the inner loop, mainly because we don't yet have
a good way of specifying a rotate in high-level. But it's interesting
that I hardly ever have to resort to assembly for anything any more...
*)
:ppc_code (hash) \ ( addr -- hash )
r5 $ 12345678 lli,
r5 8 srwi,
r0 0 r4 lbz,
r0 $ 7F andi.,
r0 mtctr,
rX r4 1 addi,
r4 r0 0 addi,
begin,
r4 r4 7 0 31 rlwinm,
r0 0 rX lbz,
rX 1 addi,
r4 r0 xor,
dnz until,
blr,
;ppc_code
: HASH
(hash)
dup 0> IF not THEN ;
: WHASH
(hash)
dup $ FFFF and
swap 16 >> xor ;
: <^ELEM> { index \ addr -- addr index }
\ Returns addr of indexed element in current object.
\ (^base) dup 4- w@x + \ ^class addr
\ dup 6 + w@x + -> addr \ indexed base addr
\ index addr 4- @ u> ?trap \ trap if out of range - note we store
\ limit-1 in the object, so equal is OK.
(^base) 2- dup w@x + -> addr \ indexed area base addr
index addr 4- @ u> ?trap \ trap if out of range - note we store
\ limit-1 in the object, so equal is OK.
addr index ;
: (^ELEM)
<^elem> over 6 - w@ * + ; \ compiled by ^ELEM if we're not
\ expanding an inline defn - see
\ qClass.
: ^ELEM1 <^elem> + ;
: ^ELEM2 <^elem> dup + + ;
: ^ELEM4 <^elem> 4* + ;
: IDXBASE { \ addr -- addr }
\ Returns start addr of indexed area in current object.
\ (^base) dup 4- w@x + \ ^class addr
\ dup 2- w@x + -> addr \ indexed base addr
(^base) 2- dup w@x + -> addr \ indexed area base addr
addr 4- @ 0< ?trap \ trap if not indexed
addr ;
: LIMIT
idxbase 4- @ 1+ ; \ we store limit-1 in the object
(* We still need PACK and UNPACK on the PPC, since the Toolbox
takes and returns a packed Point in a couple of places.
Note these numbers are signed.
*)
: PACK ( n1 n2 -- n2:n1 )
16 << swap $ ffff and or ;
: UNPACK ( n2:n1 -- n1 n2 )
dup 16 << 16 a>> swap 16 a>> ;
(* Extra multiplication and division words.
On the 68k, we dispensed with all double length (64-bit) arithmetic
in the nucleus, since the hardware didn't provide it. We used a kludged
version of I/O words such as #, in which we just ignored the high
word. We required loading of an extra file (longMath) if the real
64-bit words were needed. However here we do provide a few 64-bit
words since 32*32->64 is easy, and the Compiler Writers' Guide has
given us a 64/64->64 division routine. This means that we don't have
to kludge # et al, and don't need a PowerPC version of longMath.
*)
: /MOD inline{ 2dup / -> rX rX * - rX} ;
\ tried assembly, but the code compiled by this was
\ identical, except for reg numbers :-)
: U/MOD inline{ 2dup u/ -> rX rX * - rX} ;
: MOD inline{ /mod drop} ;
(* 64-bit division on 32-bit PowerPC implementations isn't easy, since
they took away the MQ register, and left us with only 32/32->32
instructions.
But fortunately the Compiler Writer's Guide tells us how to do
64/64->64, so here we go...
*)
:ppc_code UD/MOD ( ud_dvd ud_dsr -- ud_rem ud_quot )
(* On entry: divisor = r4:r3, dividend in 0(rSP):4(rSP), and
we move it to r6:r5.
We use tmp = r8:r7. r0 and r10 are scratch.
Note the dividend is only 64 bits, instead of the 128 that
would normally go with a 64 bit divisor. We assume the high
64 bits are zero. This means that no divisor/dividend
combinations can overflow, unless the divisor is zero.
Note also we put the most significant cell second in the
registers, because that's the way the regs get passed in to us
and the way we need to return them, and it's less confusing
to be consistent all the way through - once we get over
the confusion of having the registers this way around.
*)
\ first we check for zero divisor
r0 r3 r4 or.,
ne if,
r6 0 rSP lwz, \ get dividend to r6:r5
r5 4 rSP lwz,
\ first we count the leading zeros in the dividend -> r0
r6 0 cmpi, \ dvd(hi) = 0?
r0 r6 cntlzw, \ r0 = LZ in dvd(hi)
r9 r5 cntlzw, \ r9 = LZ in dvd(lo)
eq if, \ if dvd(hi) = 0
r0 r9 32 addi, \ LZ = LZ(lo) + 32
then,
\ now we count the leading zeros in the divisor -> r9
r4 0 cmpi, \ dsr(hi) = 0?
r9 r4 cntlzw, \ r9 = LZ in dsr(hi)
r10 r3 cntlzw, \ r10 = LZ in dsr(lo)
eq if, \ if dsr(hi) = 0
r9 r10 32 addi, \ LZ = LZ(lo) + 32
then,
\ now we work out the shift amounts to minimize the number of
\ iterations.
r0 r9 cmp, \ compare dvd LZ to dsr LZ
r10 r0 64 subfic, \ r10 = dividend sig digits (SD)
le if, \ if divisor > dividend we keep going,
\ otherwise we set quotient to 0
r9 r9 1 addi,
r9 r9 64 subfic, \ r9 = divisor SD
r0 r0 r9 add, \ r0 = dvd LZ + dsr SD, i.e. left shift
\ of dvd for initial tmp
r9 r9 r10 subf, \ r9 = dvd SD - dsr SD i.e. right shift
\ of dividend for initial temp
r9 mtctr, \ ..which is also the number of iterations.
\ now we set up r8:r7:r6:r5 as the classic division register whose length
\ is the sum of the quotient and remainder lengths - in our case, 128 bits.
\ First, the hi-order part (r8:r7) is the dividend, right shifted by r9
\ (the number of iterations).
r9 32 cmpi, \ r9 ? 32
r8 r9 -32 addi,
ge if, \ r9 >= 32:
r7 r6 r8 srw, \ lo word = dvd(hi) >> (r9-32)
r8 0 li, \ hi word = 0
else, \ r9 < 32:
r7 r5 r9 srw, \ lo word = dvd(lo) >> r9
r8 r9 32 subfic,
r8 r6 r8 slw, \ r8 = dvd(hi) << (32-r9)
r7 r7 r8 or, \ OR that into lo word
r8 r6 r9 srw, \ hi word = dvd(hi) >> r9
then,
\ Now the lo-order part of the division register (r6:r5) is the
\ dividend left shifted by r0.
r0 32 cmpi, \ r0 ? 32
r9 r0 -32 addic,
ge if, \ r0 >= 32:
r6 r5 r9 slw, \ hi word = dvd(lo) << (r0-32)
r5 0 li, \ lo word = 0
else, \ r0 < 32:
r6 r6 r0 slw, \ hi word = dvd(hi) << r0
r9 r0 32 subfic,
r9 r5 r9 srw, \ r9 = dvd(lo) >> (r0-32)
r6 r6 r9 or, \ OR that into hi word
r5 r5 r0 slw, \ lo word = dvd(lo) << r0
then,
\ Now for the main restoring division shift and subtract loop.
\ With each shift we subtract the divisor from the top half of
\ the 128-bit "register", but only use the result if it's positive.
\ In this case we shift in a 1 into the low bit position. Otherwise
\ we shift in a 0. This will be the next bit of the quotient.
\ At the end of the loop, we'll have the remainder in the high
\ half, and the quotient in the low half.
r10 -1 li, \ r10 = -1 for carry setting
r8 r8 0 addic, \ clear carry initially
CDPx \ loop start
r5 r5 r5 adde, \ here we shift the long register
r6 r6 r6 adde, \ left one place by adding each
r7 r7 r7 adde, \ portion to itself, with carry
r8 r8 r8 adde,
r0 r3 r7 subfc, \ Subtract divisor from hi half
r9 r4 r8 subfe., \ of long register -> r9:r0
ge if, \ Result was positive, so we use it
r7 r0 mr, \ move result to hi half of long reg
r8 r9 mr,
r0 r10 1 addic, \ and set carry bit -
then, \ carry bit will come into the lo
\ bit position of the long reg on
\ the next shift.
dnz bc, \ loop
\ now we write the results. The quotient is in the lo half of the long
\ reg, but needs one more shift, bringing the carry into the lo bit.
\ At the same time we get the quotient to r4:r3, where we want it.
r3 r5 r5 adde,
r4 r6 r6 adde,
\ The remainder is in r8:r7 - we now put it back into the memory part
\ of the stack, where the original dividend came from. As we always
\ return 2 cells in registers from a code definition, we'll now
\ have the remainder under the quotient, as required.
r7 4 rSP stw,
r8 0 rSP stw,
blr,
then,
\ if we got here, the divisor > dividend, so the quotient is zero
\ and remainder = dividend. The remainder is already in the right
\ place so we only have to clear the quotient (r4:r3).
r3 0 li,
r4 0 li,
blr,
then,
\ and if we got here, the divisor was zero. We THROW the code -10, which
\ means "division by zero".
r4 -10 li,
r0 ' throw 2+ dicaddr,
r0 mtctr,
bctr,
;ppc_code
: UM/MOD ( d u -- urem uquot )
0 ud/mod drop nip ;
: UMD/MOD ( ud_dvd u_dsr -- u_rem ud_quot )
0 ud/mod rot drop ;
: M/MOD ( d n ) { \ dvdSgn dsrSgn -- rem quot }
false -> dvdSgn
s>d dup -> dsrSgn tuck + xor
over 0< IF down dnegate rot true -> dvdSgn THEN
um/mod
\ now we set the sign of the quotient - negative if the
\ signs of the dividend and divisor differed.
dvdSgn dsrSgn xor tuck + xor
\ now we set the sign of the remainder - same as dividend.
swap dvdSgn tuck + xor swap
;
: */MOD inline{ -> rY m* rY m/mod} ;
: */ inline{ */mod nip} ;